This project quickly conduct the appropriate SQL queries from 2 input XML files customer.xml
and nation.xml
. Create the tables in the database from the XML files and perform retrival operations on the tables that were created.
Output: customer.db
, desiredCustomers.txt
, marketSegments.txt
and nationsBalance.txt
.
In [1]:
import xml.etree.ElementTree as ET
import sqlite3 as sqlite
import sys
In [2]:
def check_result(result, desired):
if result != desired:
print "NOT_OK. Expected " + desired + "; but got "+result
else:
print "OK!"
return
In [3]:
def read_xml_file(xml_filename, field_types):
tree = ET.parse(xml_filename)
root = tree.getroot()
lot = []
for child in root:
fields = []
for gc in child:
fields.append(gc.text)
field_list = []
count = 0
for f in fields:
t = field_types[count]
if t == 'int':
field_list.append(int(f))
elif t == 'string':
field_list.append(f)
elif t == 'float':
field_list.append(float(f))
else:
field_list.append(f)
count = count + 1
lot.append(tuple(field_list))
return lot;
The format of the customer.xml file records:
<T>
<C_CUSTKEY>1</C_CUSTKEY>
<C_NAME>Customer#000000001</C_NAME>
<C_ADDRESS>IVhzIApeRb ot,c,E</C_ADDRESS>
<C_NATIONKEY>15</C_NATIONKEY>
<C_PHONE>25-989-741-2988</C_PHONE>
<C_ACCTBAL>711.56</C_ACCTBAL>
<C_MKTSEGMENT>BUILDING</C_MKTSEGMENT>
<C_COMMENT>regular, regular platelets are fluffily according to the even attainments. blithely iron</C_COMMENT>
</T>
In [10]:
lot = read_xml_file(r'customer.xml',
['int', 'string', 'string', 'int', 'string', 'float', 'string', 'string'])
with sqlite.connect(r'customer.db') as con:
cur = con.cursor()
# Write a SQL query to erase any existing "customer" table in the database
cur.execute("drop table if exists customer;")
# Write a SQL query to create a new "customer" table with the following columns:
# C_CUSTKEY (integer) C_NAME (text) C_ADDRESS (text) C_NATIONKEY (integer) C_PHONE (text) C_ACCTBAL (real) C_MKTSEGMENT (text) C_COMMENT (text)
cur.execute("create table customer(C_CUSTKEY integer, C_NAME text, C_ADDRESS text, C_NATIONKEY integer, C_PHONE text, C_ACCTBAL real, C_MKTSEGMENT text, C_COMMENT text)")
# Write a SQL query to insert the values in the list-of-tuples variable lot into the customer table
cur.executemany("insert into customer values(?,?,?,?,?,?,?,?)", lot)
con.commit()
# Write a SQL query to compute the count of all records (rows) in the customer table
cur.execute("select Count(*) from customer;")
print "customer count: "
check_result(str(cur.fetchone()[0]), "1500")
# Write a SQL query to compute the average account balance (C_ACCTBAL) over all rows in the customer table.
cur.execute("select avg(C_ACCTBAL) from customer;")
print "customer average: "
check_result(str(cur.fetchone()[0]), "4454.57706")
# Write a SQL query to get the C_CUSTKEY, C_NAME, and C_ACCTBAL columns from the customer table that
# have C_ACCTBAL >= 1000, and sort the results by C_CUSTKEY
cur.execute("select C_CUSTKEY, C_NAME, C_ACCTBAL from customer where (C_ACCTBAL >= '1000') order by C_CUSTKEY; ")
# fetchall() method gets all records. It returns a list of tuples.
# Each of the tuples represent a row in the table.
rows = cur.fetchall()
f = open(r'desiredCustomers.txt', 'w')
f.write('Customer Key' + '\t' + 'Name' + '\t' + 'Account Balance\n')
for row in rows:
f.write('\t'.join([str(x) for x in row]) + '\n')
f.close()
# Write a SQL query to sum the C_ACCTBAL field by market segment C_MKTSEGMENT (call this total column T_ACCTBAL) and sorted by descending T_ACCTBAL
# Output columns should be C_MKTSEGMENT, and the sum over C_ACCTBAL (sorted by descending amount)
cur.execute("select C_MKTSEGMENT, sum(C_ACCTBAL) as T_ACCTBAL from customer group by C_MKTSEGMENT order by T_ACCTBAL desc; ")
# fetchall() method gets all records. It returns a list of tuples.
# Each of the tuples represent a row in the table.
rows = cur.fetchall()
f = open(r'marketSegments.txt', 'w')
f.write('Market Segment' + '\t' + 'Total Customer Account Balance\n')
for row in rows:
f.write('\t'.join([str(x) for x in row]) + '\n')
f.close()
The format of the nation.xml file records:
<T>
<N_NATIONKEY>0</N_NATIONKEY>
<N_NAME>ALGERIA</N_NAME>
<N_REGIONKEY>0</N_REGIONKEY>
<N_COMMENT>final accounts wake quickly. special request</N_COMMENT>
</T>
In [6]:
lot = read_xml_file(r'nation.xml', ['int', 'string', 'int', 'string'])
with sqlite.connect(r'customer.db') as con:
cur = con.cursor()
# Write a SQL query to erase any existing "nation" table in the database
cur.execute("drop table if exists nation;")
# Write a SQL query to create a new "nation" table with the columns: N_NATIONKEY (integer) N_NAME (string) N_REGIONKEY (integer) C_COMMENT (text)
cur.execute("create table nation(N_NATIONKEY integer, N_NAME string, N_REGIONKEY integer, C_COMMENT text)")
# Write a single SQL query and cursor command here to insert all the tuples into the "nation" table, using the list-of-tuples variable "lot"
cur.executemany("insert into nation values(?,?,?,?)", lot)
con.commit()
# Write a SQL query to compute the total account balance (C_ACCTBAL) in the customer table for each nation. The output columns
# should be N_NAME (country name) from the "nation" table and the total account balance, i.e. the sum of C_ACCTBAL in the "customer" table
# for records grouped by country. The result should be sorted alphabetically by country name.
cur.execute("select nation.N_NAME, sum(customer.C_ACCTBAL) from nation, customer where customer.C_NATIONKEY=nation.N_NATIONKEY group by nation.N_NAME order by nation.N_NAME")
# Each of the tuples represent a row in the table.
rows = cur.fetchall()
f = open(r'nationsBalance.txt', 'w')
f.write('Nation Name' + '\t' + 'Total Customer Account Balance\n')
for row in rows:
f.write('\t'.join([str(x) for x in row]) + '\n')
f.close()
In [ ]: